Jacob Fiksel
March 12, 2019
library(sf)
usa <- read_sf("data/cb_2017_us_state_20m.shp")
usa
Simple feature collection with 52 features and 9 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
epsg (SRID): 4269
proj4string: +proj=longlat +datum=NAD83 +no_defs
# A tibble: 52 x 10
STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND AWATER
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
1 02 017855… 0400000… 02 AK Alas… 00 1.48e12 2.78e11
2 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10
3 08 017797… 0400000… 08 CO Colo… 00 2.68e11 1.18e 9
4 11 017023… 0400000… 11 DC Dist… 00 1.58e 8 1.87e 7
5 16 017797… 0400000… 16 ID Idaho 00 2.14e11 2.39e 9
6 17 017797… 0400000… 17 IL Illi… 00 1.44e11 6.21e 9
7 19 017797… 0400000… 19 IA Iowa 00 1.45e11 1.08e 9
8 21 017797… 0400000… 21 KY Kent… 00 1.02e11 2.39e 9
9 22 016295… 0400000… 22 LA Loui… 00 1.12e11 2.37e10
10 24 017149… 0400000… 24 MD Mary… 00 2.52e10 6.98e 9
# … with 42 more rows, and 1 more variable: geometry <MULTIPOLYGON [°]>
st_geometry(usa)
Geometry set for 52 features
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -179.1743 ymin: 17.91377 xmax: 179.7739 ymax: 71.35256
epsg (SRID): 4269
proj4string: +proj=longlat +datum=NAD83 +no_defs
First 5 geometries:
MULTIPOLYGON (((-173.0746 60.70466, -172.9126 6...
MULTIPOLYGON (((-118.594 33.4672, -118.4848 33....
MULTIPOLYGON (((-109.06 38.49999, -109.06 38.49...
MULTIPOLYGON (((-77.11976 38.93434, -77.04102 3...
MULTIPOLYGON (((-117.243 44.39097, -117.2151 44...
library(tidyverse)
usa_48 <-
usa %>%
filter(!(STUSPS %in% c('AK', 'HI', 'PR')))
state_areas <-
usa %>%
mutate(area = st_area(.)) %>%
st_set_geometry(NULL) %>%
select(NAME, area) %>%
arrange(desc(area))
state_areas
# A tibble: 52 x 2
NAME area
<chr> <S3: units>
1 Alaska 1.560670e+12 [m^2]
2 Texas 6.926629e+11 [m^2]
3 California 4.105170e+11 [m^2]
4 Montana 3.806071e+11 [m^2]
5 New Mexico 3.149147e+11 [m^2]
6 Arizona 2.952381e+11 [m^2]
7 Nevada 2.863439e+11 [m^2]
8 Colorado 2.695815e+11 [m^2]
9 Wyoming 2.533501e+11 [m^2]
10 Oregon 2.513754e+11 [m^2]
# … with 42 more rows
usa_48 %>%
ggplot() +
geom_sf()
socviz package which has opioid death rates
by states over timelibrary(socviz)
data(opiates)
head(opiates[,c("adjusted", "abbr")])
# A tibble: 6 x 2
adjusted abbr
<dbl> <chr>
1 0.8 AL
2 4 AK
3 4.7 AZ
4 1.1 AR
5 4.5 CA
6 3.7 CO
opiates object has the state names in the abbr column,
while our usa_48 object has the state names in the STUSPS columnopiates <-
opiates %>%
select(-state) %>%
rename(STUSPS = abbr) %>%
complete(STUSPS, year)
sf object with this tibble, just as if itself
were a tibbleusa_48 <- left_join(usa_48, opiates, by = "STUSPS")
head(usa_48)
Simple feature collection with 6 features and 18 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -124.4096 ymin: 32.53416 xmax: -114.1391 ymax: 42.00925
epsg (SRID): 4269
proj4string: +proj=longlat +datum=NAD83 +no_defs
# A tibble: 6 x 19
STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND AWATER year
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <int>
1 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 1999
2 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 2000
3 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 2001
4 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 2002
5 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 2003
6 06 017797… 0400000… 06 CA Cali… 00 4.03e11 2.05e10 2004
# … with 9 more variables: fips <int>, deaths <int>, population <int>,
# crude <dbl>, adjusted <dbl>, adjusted_se <dbl>, region <ord>,
# division_name <chr>, geometry <MULTIPOLYGON [°]>
usa_48 %>%
filter(year > 1999) %>%
ggplot(aes(fill = adjusted)) +
geom_sf() +
facet_wrap(~year, ncol = 4) +
scale_fill_viridis_c(option = "plasma") +
theme(legend.position = "bottom",
strip.background = element_blank()) +
labs(fill = "Death rate per 100,000 population ",
title = "Opiate Related Deaths by State, 2000-2014")
library(gganimate)
usa_48 %>%
filter(year > 1999) %>%
ggplot(aes(fill = adjusted)) +
geom_sf()
scale_fill_viridis_c(option = "plasma") +
labs(fill = "Death rate per 100,000 population ",
title = "Opiate Related Deaths by State, {frame_time}") +
transition_time(as.integer(year)) +
ease_aes('bounce-in-out')
library(sf)
library(tidyverse)
library(lubridate)
crimes <- read.csv("data/BPD_Part_1_Victim_Based_Crime_Data.csv")
shootings <-
crimes %>%
mutate(CrimeDate = as.character(CrimeDate),
CrimeDate = mdy(CrimeDate),
month = month(CrimeDate),
year = year(CrimeDate)) %>%
filter(!is.na(Latitude), !is.na(Longitude), Description == "SHOOTING") %>%
select(Latitude, Longitude, month, year)
shootings_sf <- st_as_sf(shootings, coords = c("Longitude", "Latitude"))
st_geometry(shootings_sf)
Geometry set for 2764 features
geometry type: POINT
dimension: XY
bbox: xmin: -76.70956 ymin: 39.22226 xmax: -76.53015 ymax: 39.37195
epsg (SRID): NA
proj4string: NA
First 5 geometries:
POINT (-76.55572 39.32584)
POINT (-76.59638 39.31567)
POINT (-76.65384 39.30471)
POINT (-76.63653 39.30819)
POINT (-76.63653 39.30819)
neighborhoods <-
read_sf("data/baltimore_neighborhoods/Neighborhoods.shp") %>%
select(Name)
st_geometry(neighborhoods)
Geometry set for 278 features
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: 1393927 ymin: 557733.6 xmax: 1445503 ymax: 621406.8
epsg (SRID): NA
proj4string: +proj=lcc +lat_1=38.3 +lat_2=39.45 +lat_0=37.66666666666666 +lon_0=-77 +x_0=399999.9999999999 +y_0=0 +datum=NAD83 +units=us-ft +no_defs
First 5 geometries:
MULTIPOLYGON (((1422345 603620.8, 1422192 60361...
MULTIPOLYGON (((1404990 592042, 1404990 592036....
MULTIPOLYGON (((1434377 608229.7, 1434487 60820...
MULTIPOLYGON (((1401059 612450.6, 1401005 61251...
MULTIPOLYGON (((1437179 597502.8, 1437145 59753...
neighborhoods <- st_transform(neighborhoods, "+proj=longlat")
st_geometry(neighborhoods)
Geometry set for 278 features
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -76.71141 ymin: 39.19723 xmax: -76.52967 ymax: 39.372
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
First 5 geometries:
MULTIPOLYGON (((-76.61113 39.32344, -76.61167 3...
MULTIPOLYGON (((-76.67263 39.29184, -76.67262 3...
MULTIPOLYGON (((-76.56852 39.33594, -76.56814 3...
MULTIPOLYGON (((-76.68626 39.3479, -76.68646 39...
MULTIPOLYGON (((-76.5588 39.30646, -76.55892 39...
st_crs(shootings_sf) <- st_crs(neighborhoods)
shootings_in_bmore_neighborhoods <-
shootings_sf %>%
st_crop(st_bbox(neighborhoods)) %>%
st_intersection(neighborhoods)
shootings_in_bmore_neighborhoods
Simple feature collection with 2764 features and 3 fields
geometry type: POINT
dimension: XY
bbox: xmin: -76.70956 ymin: 39.22226 xmax: -76.53015 ymax: 39.37195
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
First 10 features:
month year Name geometry
172 11 2018 Allendale POINT (-76.67467 39.28939)
566 5 2018 Allendale POINT (-76.67961 39.29269)
684 3 2018 Allendale POINT (-76.67817 39.28942)
685 3 2018 Allendale POINT (-76.67817 39.28942)
686 3 2018 Allendale POINT (-76.67817 39.28942)
1073 8 2017 Allendale POINT (-76.67352 39.29022)
1136 7 2017 Allendale POINT (-76.68107 39.29169)
1137 7 2017 Allendale POINT (-76.68107 39.29169)
1319 3 2017 Allendale POINT (-76.67544 39.29144)
1320 3 2017 Allendale POINT (-76.67544 39.29144)
ggplot() +
geom_sf(data = neighborhoods) +
geom_sf(data = shootings_in_bmore_neighborhoods, alpha = .4)
st_geometry(shootings_in_bmore_neighborhoods) <- NULL
shootings_by_neighborhood <-
shootings_in_bmore_neighborhoods %>%
group_by(Name) %>%
summarize(nshootings = n())
neighborhoods <-
left_join(neighborhoods, shootings_by_neighborhood, by = "Name")
neighborhoods
Simple feature collection with 278 features and 2 fields
geometry type: MULTIPOLYGON
dimension: XY
bbox: xmin: -76.71141 ymin: 39.19723 xmax: -76.52967 ymax: 39.372
epsg (SRID): 4326
proj4string: +proj=longlat +ellps=WGS84 +no_defs
# A tibble: 278 x 3
Name nshootings geometry
<chr> <int> <MULTIPOLYGON [°]>
1 Abell NA (((-76.61113 39.32344, -76.61167 39.32343, -…
2 Allendale 21 (((-76.67263 39.29184, -76.67262 39.29182, -…
3 Arcadia 4 (((-76.56852 39.33594, -76.56814 39.33588, -…
4 Arlington 21 (((-76.68626 39.3479, -76.68646 39.3481, -76…
5 Armistead Gard… 2 (((-76.5588 39.30646, -76.55892 39.30654, -7…
6 Ashburton 5 (((-76.67465 39.32513, -76.67556 39.3254, -7…
7 Baltimore High… 15 (((-76.56954 39.29424, -76.56954 39.29431, -…
8 Patterson Park… 3 (((-76.57253 39.29477, -76.57205 39.2948, -7…
9 Barclay 23 (((-76.60948 39.31215, -76.60948 39.31206, -…
10 Barre Circle 2 (((-76.62756 39.28383, -76.62783 39.28382, -…
# … with 268 more rows
bmore_shootings <-
neighborhoods %>%
mutate(nshootings = ifelse(is.na(nshootings), 0, nshootings)) %>%
ggplot(aes(fill = nshootings)) +
geom_sf() +
scale_fill_viridis_c(option = "plasma") +
guides(fill=guide_legend(title=NULL)) +
ggtitle("Number of shootings by neighborhood \nJan. 2015 - Feb. 2019")